// Build kmeans clustering
use "${tmp}/Proximity_naflabex.dta", clear
	
keep labexid ape prox
sort labexid ape
export delimited "${tmp}/labexnaf_prox.csv", delim(",") replace

local inpath = "${tmp}/"
local outpath = "${tmp}/labexkmean.csv"
local file = "${utils}/kmeans_clusters.R"


shell "${RPath}" "`file'" "`inpath'" "`outpath'" 12 > rlog.txt 2>&1"


tempfile prox
use "${tmp}/Proximity_naflabex.dta", clear
	bys labex: egen totprox = sum(prox)
	keep labexid totprox
	gduplicates drop
save `prox'

import delimited "${tmp}/labexkmean.csv", delim(",") encoding(utf8) clear
	keep labex kmeans
	gduplicates drop
	sort kmeans labex
	rename labex labexid
	merge 1:m labexid using "${tmp}/main_labex_with_ctrf", nogen keep(2 3)
	merge 1:m labexid using "${data}/source/main_labex", nogen keep(1 3) keepusing(year fund_req  )
	merge m:1 labexid using "${data}/utils/labexcom", nogen keep(1 3)
	merge m:1 labexid using `prox', nogen keep(1 3)
	
	replace totprox = 0 if mi(totprox)
	replace depcom = 75056 if floor(depcom/1000) == 75
	replace depcom = 13055 if inrange(depcom,13201,13216)
	replace depcom = 69123 if inrange(depcom,69381,69389)

	merge m:1 depcom using "${data}/Utils/depcom_ZE_mapping", nogen keep(1 3)
	drop if note_1 == 0
	

	su kmeans
	replace kmeans = r(max) + 1 if mi(kmeans)
	gsort - note_totale
	gsort kmeans  year - note_totale
	by kmeans year: gen rk_disc = _n
	gsort region  year - note_totale
	by region  year: gen rk_reg = _n
	gsort region kmeans  year - note_totale
	bys region kmeans  year: gen rk_discreg = _n





	
		gen above30 = (note_totale >= 30)

	
	label var note_1 "Grade: Team quality"
	label var note_2 "Grade: Project's scientific ambition"
	label var note_3 "Grade: Innovation and impact"
	label var note_4 "Grade: Teaching quality"
	label var note_5 "Grade: Management quality"
	label var note_6 "Grade: Partner univ. joint strategy"
	label var note_7 "Grade: Adequation ambition / funding"
	label var note_totale "Total Grade"
	label var totprox "Total proximity w/ industry (our measure)"
	label var above30 "P(Total Grade $>$ 30)"
	label var rk_disc "Rank within sci. discipline"
	label var rk_reg "Rank within region"
	label var rk_discreg "Rank within sci. discipline $\times$ region"

	gen note_r = round(note_totale)


// Table A3
	
	reghdfe accept totprox above30 note_totale, absorb(year) vce(robust)
	estimate store m1
	reghdfe accept totprox above30 note_1-note_7, absorb(year) vce(robust)
	estimate store m2
	reghdfe accept totprox above30 note_1-note_7 rk_disc rk_reg rk_discreg, absorb(year) vce(robust)
	estimate store m3
	reghdfe accept totprox above30 note_1-note_7 rk_disc rk_reg rk_discreg, absorb(year reg km) vce(robust)
	estimate store m4
	
	estout m1 m2 m3 m4 using "${outpath}/TableA3.tex", cells(b(star fmt(%9.3f)) se(par))                ///
             stats(r2 r2_a r2_a_within N, fmt(%9.3f %9.3f %9.3f %9.0g) labels(R2 AdjR2 WithinR2 Observations))      ///
             legend label collabels(none) style(tex) drop(_cons)  starlevels(* .1 ** 0.05 *** 0.01)  replace
        

// Table A4 
	drop if kmean == 13
	gen donut = (note_r == 30 )

	tabstat accept note_? if !donut & inlist(note_r, 29,31), by(above30) s(mean) nototal long  col(stat)
	cap tab kmeans, gen(km_)

	gen foo = note_1+note_2+note_3+note_4+note_5+note_6+note_7
	forvalues i = 1/7 {
		replace note_`i' = note_`i'/ foo
	}
	
	gen log_fund_req = log(fund_req)

	local labcrit1 = "Team quality"
	local labcrit2 = "Project's scientific ambition"
	local labcrit3 = "Innovation and impact"
	local labcrit4 = "Teaching quality"
	local labcrit5 = "Management quality"
	local labcrit6 = "Partner university joint strategy"
	local labcrit7 = "Adequation ambition / funding"
	
	local row0 = "& Grade of 29 & Grade of 31 & Student test of equality \\ "
	ttest accept if !donut & inrange(note_r, 29,31) , by(above30)
	local row1 = "Proba. to be funded & `=string(r(mu_1),  "%9.3f")' & `=string(r(mu_2),  "%9.3f")' & [p=`=string(r(p),  "%9.3f")'] \\"
	local row2 = "&(`=string(r(sd_1),  "%9.3f")') & (`=string(r(sd_2),  "%9.3f")') & \\ "
	
	local row3 = "Share of total grade attributed to \\"
	
	forvalues i = 1/7 {
	ttest note_`i' if !donut & inrange(note_r, 29,31) , by(above30)
	local j = 4+(`i'-1)*2
	local row`j' = "\hspace{1cm} `labcrit`i'' (crit. `i') & `=string(r(mu_1),  "%9.3f")' & `=string(r(mu_2),  "%9.3f")' & [p=`=string(r(p),  "%9.3f")'] \\" 
	local k = `j'+1
	local row`k' = "&(`=string(r(sd_1),  "%9.3f")') & (`=string(r(sd_2),  "%9.3f")') & \\ "
	}
	local row17 = "Rank \\"
	
	ttest rk_disc if !donut & inrange(note_r, 29,31), by(above30)
	local row18 = "\hspace{1cm} Within discipline & `=string(r(mu_1),  "%9.3f")' & `=string(r(mu_2),  "%9.3f")' & [p=`=string(r(p),  "%9.3f")'] \\"
	local row19 =  "&(`=string(r(sd_1),  "%9.3f")') & (`=string(r(sd_2),  "%9.3f")') & \\ "
	
	ttest rk_reg  if !donut & inrange(note_r, 29,31), by(above30)
	local row20 = "\hspace{1cm} Within region & `=string(r(mu_1),  "%9.3f")' & `=string(r(mu_2),  "%9.3f")' & [p=`=string(r(p),  "%9.3f")'] \\"
	local row21 =  "&(`=string(r(sd_1),  "%9.3f")') & (`=string(r(sd_2),  "%9.3f")') & \\ "
	
	ttest rk_discreg if !donut & inrange(note_r, 29,31), by(above30)
	local row22 = "\hspace{1cm} Within discipline x region & `=string(r(mu_1),  "%9.3f")' & `=string(r(mu_2),  "%9.3f")' & [p=`=string(r(p),  "%9.3f")'] \\"
	local row23 =  "&(`=string(r(sd_1),  "%9.3f")') & (`=string(r(sd_2),  "%9.3f")') & \\ "
	
	
	ttest totprox  if !donut & inrange(note_r, 29,31), by(above30)
	local row24 = "Total proximity & `=string(r(mu_1),  "%9.3f")' & `=string(r(mu_2),  "%9.3f")' & [p=`=string(r(p),  "%9.3f")'] \\"
	local row25 = "&(`=string(r(sd_1),  "%9.3f")') & (`=string(r(sd_2),  "%9.3f")') & \\ "
		
	ttest log_fund_req  if !donut & inrange(note_r, 29,31), by(above30)
	local row26 = "Funding required (log) & `=string(r(mu_1),  "%9.3f")' & `=string(r(mu_2),  "%9.3f")' & [p=`=string(r(p),  "%9.3f")'] \\"
	local row27 =  "&(`=string(r(sd_1),  "%9.3f")') & (`=string(r(sd_2),  "%9.3f")') & \\ "
	
	local row28 = "Nb of LabEx candidates & r(N1) & r(N2) & =`r(N1)+r(N2)' \\"
	
		file open myfile using "${outpath}/tableA4.tex", write text replace
	forvalues i = 0/28 {
		file write myfile " `row`i''" _n
	}
	file close myfile
	
	